{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from matplotlib import rc\n",
    "import pandas as pd\n",
    "import re\n",
    "\n",
    "# plot style\n",
    "sns.set_style('whitegrid')\n",
    "sns.set_style({'font.family': 'Times New Roman'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"data/readability_metrics.csv\", delimiter='\\t', \\\n",
    "                 names=['doi','filename', 'line_no', 'avg_line_len', 'max_line_len', 'avg_indentation', 'max_indentation', 'avg_numbers', 'avg_comments', 'max_numbers', 'avg_periods', 'avg_commas', 'avg_spaces', 'avg_parentheses', 'avg_arithmetic_operators', 'avg_comparison_operators', 'avg_assignments', 'avg_branches', 'avg_loops', 'avg_keywords', 'max_keywords', 'avg_blank_lines', 'max_occurrence_of_character', 'avg_vars_len', 'max_vars_len', 'avg_vars_count', 'max_vars_count', 'max_occurence_of_var', 'vars'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": "3070\n"
    }
   ],
   "source": [
    "all_vars = []\n",
    "onetwo = 0\n",
    "\n",
    "def all_vars_f(el):\n",
    "    els = el.split(\";\")\n",
    "    all_vars.extend(els)\n",
    "\n",
    "    for a in els:\n",
    "        if a == 'r' or a == 'R':\n",
    "            continue\n",
    "        if len(a) <3:\n",
    "            return 1\n",
    "    return 0 \n",
    "\n",
    "df['small_var'] = df[\"vars\"].dropna().apply(all_vars_f)\n",
    "print(len(df[\"vars\"].dropna()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "621.0"
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['small_var'].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "{'MH.bound.se': 2,\n 'pewmafilter': 3,\n 'sev.t': 4,\n 'balance.iptw': 3,\n 'set<-': 2,\n 'count_ocwf_SCF53': 1,\n 'count_ocwf_SCF52': 1,\n 'count_ocwf_SCF51': 1,\n 'count_ocwf_SCF50': 1,\n 'count_ocwf_SCF55': 1,\n 'count_ocwf_SCF54': 1,\n 'percentrock_SCF60c': 1,\n 'percentrock_SCF60b': 1,\n 'percentrock_SCF60a': 1,\n 'obj.lquote.wh.labs': 2,\n 'eff.num.gov.parties': 1,\n 'nboot': 2,\n 'norm.posterior': 1,\n 'clusteredSE_ARRSM': 16,\n 'arch_lag': 4,\n 'X.boot': 2,\n 'n.run': 2,\n 'VCD_limit': 3,\n 'na.zero': 2,\n 'papsamex': 1,\n 'mod.aa.extra2': 1,\n 'crossValidation': 1,\n 'plot_ass_words': 2,\n 'translateChunk': 6,\n 'resp.bias': 11,\n 'emp_number': 2,\n 'range01': 1,\n 'typenames': 3,\n 'p.vals.male': 1,\n 'dist_crime': 1,\n 'biomass_lcod_SCF55': 1,\n 'biomass_lcod_SCF54': 1,\n 'biomass_lcod_SCF53': 1,\n 'biomass_lcod_SCF52': 1,\n 'biomass_lcod_SCF51': 1,\n 'biomass_lcod_SCF50': 1,\n 'combine_datasets': 1,\n 'k': 40,\n 'm.year.suit': 1,\n 'theme_bw_finegrid_special': 3,\n 'txo': 1,\n 'county100': 1,\n 'sdata': 3,\n 'count_lcod_SCG06': 1,\n 'count_lcod_SCG07': 1,\n 'count_lcod_SCG04': 1,\n 'count_lcod_SCG05': 1,\n 'count_lcod_SCG02': 1,\n 'count_lcod_SCG03': 1,\n 'mstepfunction': 4,\n 'count_lcod_SCG01': 1,\n 'plot.coefs': 3,\n 'popul1lo': 1,\n 'biomass_copp_SCG52a': 1,\n 'biomass_copp_SCG52b': 1,\n 'Timor_Leste': 2,\n 'dnb': 5,\n 'logmsg': 10,\n 'tokenize': 6,\n 'disc_sp': 3,\n 'rename_factors': 2,\n 'R_output15': 4,\n 'series_name': 7,\n 'preprocess': 4,\n 'dpcr': 1,\n 'excl': 1,\n 'format.rc.binary': 2,\n 'datacfrfull': 2,\n 'EDE': 4,\n 'cces.bc': 1,\n 'weighted_sd': 10,\n 'qual.lo': 2,\n 'p.vals.inc': 1,\n 'label_size': 5,\n 'get.inference': 4,\n 'H.Dem.STD.NOM.median': 5,\n 'type9': 2,\n 'type8': 2,\n 'want': 12,\n 'dtsam': 1,\n 'type4': 2,\n 'type7': 2,\n 'type6': 2,\n 'type1': 2,\n 'GeomMarginBoxplot': 4,\n 'DPI': 13,\n 'rf_to_viz': 7,\n 'weighted_var': 10,\n 'TMT_CCES12': 7,\n 'significance': 1,\n 'classify': 11,\n 'A': 2,\n 'regime_density': 4,\n 'n_activists_journal_original_peer': 1,\n 'DW_NOM_Rates': 7,\n 'count_rock_SCG52b': 1,\n 'count_rock_SCG52a': 1,\n 'cor_labele': 2,\n 'types': 3,\n 'clArgs': 44,\n 'sqsp': 1,\n 'd_list': 1,\n 'vars': 11,\n 'f.preprocess': 1,\n 'varfun.Shafir.1': 4,\n 'varz': 3,\n 'count_cash_SCG56': 1,\n 'count_cash_SCG57': 1,\n 'count_cash_SCG55': 1,\n 'count_cash_SCG50': 1,\n 'count_cash_SCG51': 1,\n 'Rmd2htmlWP': 4,\n 'count_cash_SCG58': 1,\n 'bjps_rep': 5,\n 'cate.hat.hightalk.therm': 1,\n 'cum_precision_sizes': 2,\n 'fix': 3,\n 'p.EDE': 4,\n 'fig': 4,\n 'FirstEmp_list': 2,\n 'pc_skew': 23,\n 'pcurve_prep': 25,\n 'plotigammaprior': 16,\n 'geom_marginboxplot': 4,\n 'count_hfbd_SCF60a': 1,\n 'cate.hat.highknow': 1,\n 'count_hfbd_SCF60c': 1,\n 'plot_dists': 1,\n 'cox.ed.gam': 3,\n 'beta12': 2,\n 'lin.mean': 4,\n 'z.Wq': 1,\n 'z.Wr': 1,\n 'fold_path': 4,\n 'arrow': 1,\n 'debug': 6,\n 'AMEselection': 2,\n 'gen.outcome.vars': 4,\n 'H.Rep.STD.IRT.median': 5,\n 'mean.prior': 1,\n 'rt': 1,\n 'rv': 3,\n 'rp': 3,\n 'rs': 2,\n 'addSCNet': 4,\n 're': 4,\n 'rf': 3,\n 'rg': 3,\n 'rh': 7,\n 'count_olyt_SCG58': 1,\n 'count_olyt_SCG55': 1,\n 'count_olyt_SCG57': 1,\n 'count_olyt_SCG56': 1,\n 'count_olyt_SCG51': 1,\n 'count_olyt_SCG50': 1,\n 'subst_SCG53a': 1,\n 'estimate': 4,\n 'max.na': 6,\n 'r1': 4,\n 'Rtc': 15,\n 'r3': 2,\n 'takeout': 2,\n 'att': 11,\n 'count_drrf_SCG53b': 1,\n 'count_drrf_SCG53a': 1,\n 'Votes': 5,\n 'data1A': 4,\n 'sutherland': 2,\n 'binplot': 2,\n 'technical.edu': 2,\n 'highci': 1,\n 'PolePeaks': 1,\n 'master': 1,\n 'wordXX': 5,\n 'logbreak': 10,\n 'otherhou_voter_low': 1,\n 'forpaper': 2,\n 'CoverInter': 2,\n 'cdata': 2,\n 'distribution_to_estimate': 4,\n 'E.1S1D': 1,\n 'count_prch_SCF01': 1,\n 'count_prch_SCF06': 1,\n 'biomass_bcac_SCG56': 1,\n 'intersectFast': 2,\n 'es.num.wb': 2,\n 'f.splitroles': 2,\n 'es.num.wh': 2,\n 'biomass_casc_SCG50': 1,\n 'biomass_casc_SCG51': 1,\n 'biomass_casc_SCG56': 1,\n 'biomass_casc_SCG57': 1,\n 'biomass_casc_SCG55': 1,\n 'biomass_hfbd_SCF60a': 1,\n 'biomass_casc_SCG58': 1,\n 'biomass_hfbd_SCF60b': 1,\n 'avgData.maker': 1,\n 'main_effect': 1,\n 'ests': 4,\n 'm5s.cln.num_female_users': 1,\n 'simData.QRP': 8,\n 'mjew12': 7,\n 'paper_desc': 8,\n 'balance_other': 1,\n 'count_cuke_SCF01': 1,\n 'strip.junk': 1,\n 'Clapp_Hornberger': 1,\n 'EM1.Block': 10,\n 'LogHyper': 1,\n 'hab_SCF52': 1,\n 'hab_SCF53': 1,\n 'hab_SCF50': 1,\n 'hab_SCF51': 1,\n 'hab_SCF54': 1,\n 'hab_SCF55': 1,\n 'ungar': 1,\n 'es.pct.wh.labs': 2,\n 'correcttot.eb': 1,\n 'obj.hqpct.wb': 2,\n 'principalstratmod_monoind': 13,\n 'obj.hqpct.wh': 2,\n 'f.temp': 3,\n 'set.Arial': 4,\n 'wage.vars': 4,\n 'biomass_lcod_SCG53b': 1,\n 'E.S1D': 1,\n 'biomass_lcod_SCG53a': 1,\n 'green1': 4,\n 'green3': 4,\n 'green2': 4,\n 'x.usesen.small': 17,\n 'auditdata': 8,\n 'wordfish_rank_plot_apriori_ordering': 1,\n 'rice': 2,\n 'AK2logLik_LLH': 19,\n 'anes2004': 1,\n 'wildboot_variance': 1,\n 'obj.mat': 2,\n 'D.prior.vars': 2,\n 'obj.lquote.wb.labs': 2,\n 'X': 25,\n 'getLMFormula': 31,\n 'datafile': 2,\n 'gilm.tab.spine': 1,\n 'biomass_vrmlcnry_SCG01': 1,\n 'biomass_vrmlcnry_SCG02': 1,\n 'biomass_vrmlcnry_SCG03': 1,\n 'biomass_vrmlcnry_SCG04': 1,\n 'biomass_vrmlcnry_SCG05': 1,\n 'biomass_vrmlcnry_SCG06': 1,\n 'biomass_vrmlcnry_SCG07': 1,\n 'x.usesens.small.easy': 4,\n 'GenCompData': 12,\n 'd_cases': 1,\n 'mod.ind.extra2': 1,\n 'result': 4,\n 'ClusteredSE_AK': 19,\n 'parpfilter': 3,\n 'contr.summary': 2,\n 'score': 2,\n 'biomass_prch_SCG62': 1,\n 'theme_map': 2,\n 'biomass_prch_SCG60': 1,\n 'CoverZ': 2,\n 'univlist': 6,\n 'betweenlist': 3,\n 'CARIRT': 13,\n 'calc.var.tau.sd': 5,\n 'g2_stage1': 1,\n 'g2_stage2': 1,\n 'clog.loglog.ind.ll': 1,\n 'boot.cluster': 1,\n 'H4_06': 3,\n 'H4_07': 3,\n 'H4_05': 3,\n 'post.bh': 2,\n 'diff': 6,\n 'combine_data': 1,\n 'build_slopegraph': 1,\n 'H4_08': 3,\n 'H4_09': 3,\n 'ep.wf.negbin.b': 8,\n 'sqLiteConnect': 14,\n 'probit.seq': 3,\n 'matrix2dyads': 2,\n 'cue': 11,\n 'beta3': 1,\n 'beta2': 1,\n 'beta1': 1,\n 'm4n': 7,\n 'scare.coef': 2,\n 'count_rock_SCF55': 1,\n 'count_rock_SCF54': 1,\n 'count_rock_SCF51': 1,\n 'count_rock_SCF50': 1,\n 'count_rock_SCF53': 1,\n 'count_rock_SCF52': 1,\n 'percentrock_SCG53b': 1,\n 'percentrock_SCG53a': 1,\n 'm5s_group_ocd': 17,\n 'na.coef.var.restrict': 1,\n 'plotlinealpha': 2,\n 'numboot': 4,\n 'fol_path': 3,\n 'Campbell': 1,\n 'NwNwN': 5,\n 'read_qualtrics_csv': 1,\n 'count_lcod_SCG60': 1,\n 'count_lcod_SCG62': 1,\n 'sum.offer.alt': 1,\n 'Dem.STD.IRT.median': 5,\n 'craigslist': 5,\n 'split': 1,\n 'sensitivity.vintage': 2,\n 'g4_stage2': 1,\n 'g4_stage1': 1,\n 'filenames': 3,\n 'abs.diff': 3,\n 'trunctedProp': 4,\n 'matchCond.unique': 2,\n 'figA1b': 3,\n 'figA1c': 3,\n 'figA1a': 3,\n 'rrcor': 1,\n 'm5s.cln.first_timestamp': 1,\n 'pred.multinom.CUSTOM': 2,\n 'polar.index.STD.IRT': 5,\n 'house107': 5,\n 'kros': 1,\n 'SelectionTable': 10,\n 'DRC': 2,\n 'house109': 5,\n 'house108': 5,\n 'it10': 2,\n 'agreefunc': 1,\n 'g3_stage2': 1,\n 'g3_stage1': 1,\n 'conquestsadj': 1,\n 'ProbBin': 1,\n 'clinton_voter_low': 1,\n 'cosponsorship_network': 44,\n 'bho.therm3': 2,\n 'bho.therm2': 2,\n 'mod_stargazer': 3,\n 'theme_bw_finegrid': 3,\n 'es.num.wh.labs': 2,\n 'gov.cohesion': 1,\n 'along_labs': 3,\n 'left1': 1,\n 'left2': 1,\n 'pid_lo': 1,\n 'in.GR': 1,\n 'ols': 3,\n 'subtle.boxplot': 3,\n 'txt': 1,\n 'matchedusersseg': 2,\n 'varfun.Knobe.1': 4,\n 'obj.lfstruct.wh': 2,\n 'fl.boots': 1,\n 'SWtestE': 3,\n 'foo': 2,\n 'obj.lfstruct.wb': 2,\n 'SWtest0': 3,\n 'H3_plot': 3,\n 'small_multiple_margin_right': 5,\n 'circrates': 1,\n 'o': 5,\n 'mod.ind.control': 1,\n 'panelBorderCol': 48,\n 'do_sim': 1,\n 'x.usesens.small.kbid': 3,\n 'write.xlsx.list': 2,\n 'pred_leg_two_controls': 1,\n 'cor.mtest': 2,\n 'convertES': 4,\n 'count_prch_SCG55': 1,\n 'count_prch_SCG56': 1,\n 'count_prch_SCG57': 1,\n 'count_prch_SCG50': 1,\n 'count_prch_SCG51': 1,\n 'es.pct.a': 2,\n 'es.pct.c': 2,\n 'es.pct.b': 2,\n 'multi.PLOT': 5,\n 'coef6': 5,\n 'house.total': 2,\n 'coef1': 24,\n 'coef2': 8,\n 'coef3': 5,\n 'plot_dat23': 8,\n 'plot_dat22': 8,\n 'plot_dat21': 8,\n 'plot_dat25': 8,\n 'plot_dat24': 8,\n 'max_lag_ARCH': 3,\n 'legendGrob': 3,\n 'my.print.xtable': 1,\n 'track.changes.history.all': 2,\n 'dataset.C': 1,\n 'dataset.B': 1,\n 'dataset.A': 1,\n 'paneld': 15,\n 'get_coverage2': 1,\n 'count_hfbd_SCG53a': 1,\n 'count_hfbd_SCG53b': 1,\n 'diff.cates.other.non': 1,\n 'avshiftclosertoright': 1,\n 'td.lo': 1,\n 'q.bc': 1,\n 'fraga1_CandT_W': 1,\n 'UpBound': 1,\n 'scale_colour_soa': 2,\n 'bootCFM': 1,\n 'fraga1_CandT_B': 1,\n 'fraga1_CandT_A': 1,\n 'fraga1_CandT_L': 1,\n 'biomass_lcod_SCG58': 1,\n 'experts_proportions': 1,\n 'ncols': 2,\n 'models': 1,\n 'biomass_lcod_SCG50': 1,\n 'biomass_lcod_SCG51': 1,\n 'biomass_lcod_SCG56': 1,\n 'biomass_lcod_SCG57': 1,\n 'sample_list': 1,\n 'biomass_lcod_SCG55': 1,\n 'subj.a': 2,\n 'subj.b': 2,\n 'subj.c': 2,\n 'variable': 4,\n 'allequal': 2,\n 'x.control.reject': 1,\n 'generate.lm': 1,\n 'raw_MID_participant': 43,\n 'f.lik.o.indicators': 2,\n 'time': 2,\n 'pol.size.large': 11,\n 'write_figures': 4,\n 'chain': 1,\n 'abline.point': 2,\n 'f.lik.o.legmeans': 2,\n 'Mat.gen': 6,\n 'yrange': 1,\n 'RRchild1_inc': 2,\n 'indivs': 1,\n 'RRhart2': 2,\n 'get.OSFfile': 4,\n 'TMT_CCES10_EIT': 7,\n 'demsen_voter_low': 1,\n 'StandVar': 1,\n 'trans.times': 2,\n 'T_standard_ISO': 1,\n 'm5s.cln.num_male_users': 1,\n 'beta.usesen.small': 13,\n 'multinomial.mIRT': 2,\n 'add_labels': 3,\n 'xconst_labels': 3,\n 'plot_conv_all': 2,\n 'other_voter_high': 1,\n 'prediction': 1,\n 'rbeta.s.gibbs': 4,\n 'theta.betaX.d.srE.ef': 1,\n 'dat_gdp': 4,\n 's110': 17,\n 'simex_model': 1,\n 'es.any.wh': 2,\n 'es.any.wb': 2,\n 'varfun.Hauser.1': 4,\n 'varfun.Hauser.2': 4,\n 'along': 3,\n 'roles2': 2,\n 'AMFg_FN': 3,\n 'AuthorID': 2,\n 'biomass_vrmlcnry_SCG62': 1,\n 'count_bcac_SCF52': 1,\n 'biomass_vrmlcnry_SCG60': 1,\n 'count_bcac_SCF50': 1,\n 'shareAttr': 2,\n 'count_bcac_SCF55': 1,\n 'count_bcac_SCF54': 1,\n 'plot_gini_theta_all': 2,\n 'studies': 1,\n 'ci.plot': 1,\n 'treat.mat': 3,\n 'subst_SCG01': 1,\n 'ALSOS.MDPREF': 1,\n 'subst_SCG03': 1,\n 'subst_SCG04': 1,\n 'subst_SCG05': 1,\n 'subst_SCG06': 1,\n 'subst_SCG07': 1,\n 'diff.cates.know.therm': 1,\n 'iterateObs': 4,\n 'varfun.Zhong.1': 4,\n 'varfun.Zhong.2': 4,\n 'wicked': 10,\n 'onetoneg1': 1,\n 'los2012': 1,\n 'X.SIMS.2K': 2,\n 'lbstr': 1,\n 'alpha.useresps.big.median': 3,\n 'count_cuke_SCG62': 1,\n 'output.file': 5,\n 'dat4': 2,\n 'dat3': 2,\n 'dat2': 3,\n 'type5': 2,\n 'italy_box_y': 12,\n 'italy_box_x': 12,\n 'obj.hqnum.bh.labs': 2,\n 'prior.limits': 4,\n 'safelog': 3,\n 'summ_loop': 2,\n 'bias.bilogit': 1,\n 'alpha.useresps.small.median': 3,\n 'f.word.wrap': 4,\n 'auditfo': 8,\n 'type3': 2,\n 'outflows2000': 2,\n 'hidden_env': 10,\n 'type2': 2,\n 'predicted_covariates': 4,\n 'm5s.cln.num_threads': 1,\n 'datm': 3,\n 'dati': 2,\n 'data': 95,\n 'ProbTotUnscal': 1,\n 'ss': 2,\n 'r2D': 1,\n 'sm': 1,\n 'number.of.national.candidates': 1,\n 'count_olyt_SCF50': 1,\n 'count_olyt_SCF51': 1,\n 'count_olyt_SCF52': 1,\n 'count_olyt_SCF53': 1,\n 'r2S': 1,\n 'Probs': 1,\n 'se': 10,\n 'FNR': 2,\n 'exp1means': 1,\n 'LIP': 1,\n 'reg_runner_news': 3,\n 'cor_test': 3,\n 'high.sch.grad': 2,\n 'bipartisan': 1,\n 'any_mobil_glm': 1,\n 'memodel': 1,\n 'civilian': 1,\n 'matches': 2,\n 'estTable': 46,\n 'cum_recall': 2,\n 'figA3a': 1,\n 'figA3b': 1,\n 'other_scandal': 1,\n 'maxperiods': 2,\n 'dat.a.bayes.out': 11,\n 'test_accuracy': 1,\n 'retrieve': 8,\n 'mmusl12': 7,\n 'match_other': 1,\n 'summary.spatpredmodel': 2,\n 'percent.bal': 2,\n 'process_depress': 1,\n 'pmean': 1,\n 'es.any.c.labs': 2,\n 'match_other_covered': 1,\n 'theme_bw_finegrid_horizontal': 2,\n 'cate.hat.pt': 1,\n 'VZA_FN': 3,\n 'leg_two': 1,\n 'sum.refuse.shia': 1,\n 'H4_plot': 3,\n 'biomass_copp_SCF60b': 1,\n 'rep_dir_tv': 4,\n 'removalyear': 8,\n 'ATE': 5,\n 'datnew': 1,\n 'load_packages': 5,\n 'grid_balanced': 1,\n 'pred_turnout_pride1': 1,\n 'calc.txs.ols': 5,\n 'logist': 3,\n 'SynthLeaveOneOutPrep': 1,\n 'MIR_to_pred_multifolds': 7,\n 'D.prior.means': 2,\n 'bauer': 12,\n 'rat.sup': 3,\n 'outvars.b.labs': 2,\n 'SynthRMSPE': 1,\n 'get_entropy': 1,\n 'count_casc_SCG53b': 1,\n 'count_casc_SCG53a': 1,\n 'phi2': 1,\n 'plot.ps.spatmodel': 19,\n 'Clustered_covariance_estimate': 16,\n 'start1_time': 2,\n 'roll_forward_predict_v2': 3,\n 'importModels': 24,\n 'Kern': 4,\n 'biomass_ocwf_SCF06': 1,\n 'biomass_ocwf_SCF05': 1,\n 'biomass_ocwf_SCF04': 1,\n 'biomass_ocwf_SCF03': 1,\n 'biomass_ocwf_SCF02': 1,\n 'biomass_ocwf_SCF01': 1,\n 'theme_pub': 1,\n 'FKnon': 1,\n 'diagnostic_metrics': 1,\n 'n.civilwars': 6,\n 'recode_values_all': 1,\n 'correl.se': 1,\n 'dist_to_use': 3,\n 'trans.trans': 2,\n 'type.num': 3,\n 'mydata': 7,\n 'Select2': 5,\n 'save_data_file': 16,\n 'path_to_data': 1,\n 'belief.expand.tri': 10,\n 'mod.mar08.biv': 1,\n 'predict.sp.duration': 1,\n 'figure_1': 1,\n 'pNode': 3,\n 'boot.n': 1,\n 'count_rock_SCG50': 1,\n 'count_rock_SCG51': 1,\n 'end_time': 1,\n 'count_rock_SCG56': 1,\n 'count_rock_SCG57': 1,\n 'count_rock_SCG58': 1,\n 'confint4': 16,\n 'confint3': 16,\n 'confint2': 16,\n 'confint1': 16,\n 'scale_fill_soa': 2,\n 'summary.corpus': 6,\n 'clean.ML2fieldsNA': 4,\n 'diff.cates.know': 1,\n 'robins.ci': 5,\n 'pcurve_estimate_d_CI': 25,\n 'mat_2': 1,\n 'mat_3': 1,\n 'mat_1': 1,\n 'count_copp_SCG02': 1,\n 'count_copp_SCG03': 1,\n 'count_copp_SCG01': 1,\n 'count_copp_SCG06': 1,\n 'count_copp_SCG07': 1,\n 'count_copp_SCG04': 1,\n 'count_copp_SCG05': 1,\n 'se_mean': 10,\n 's': 15,\n 'estimates2': 6,\n 'loess_plot': 13,\n 'aggdata': 3,\n 'SpecMeanTestBoot': 3,\n 'obj.lfstruct.wh.labs': 2,\n 'varfun.Risen.2': 4,\n 'varfun.Risen.1': 4,\n 'box.test': 1,\n 'collidev': 1,\n 'west': 2,\n 'fix_directories': 5,\n 'rd.placebo': 1,\n 'LIRT': 10,\n 'biomass_sqsp_SCF06': 1,\n 'biomass_sqsp_SCF05': 1,\n 'biomass_sqsp_SCF04': 1,\n 'biomass_sqsp_SCF03': 1,\n 'biomass_sqsp_SCF02': 1,\n 'biomass_sqsp_SCF01': 1,\n 'euclidean.distancer': 1,\n 'avg.cu': 4,\n 'shade.color': 1,\n 'get.ESCI': 4,\n 'analysisFiles': 17,\n 'DRDpvalue': 2,\n 'first.res': 1,\n 'splitdel': 1,\n 'ar2PM': 2,\n 'type.short': 3,\n 'll_ensemble_weights': 40,\n 'DID': 1,\n 'my.sims': 2,\n 'blah': 1,\n 'tableFitStatistics': 1,\n 'getReducedForm': 31,\n 'mean_mse_uds': 3,\n 'cluster_se_glm': 2,\n 'gen.tab_nic': 1,\n 'papcr': 1,\n 'varlabels': 3,\n 'mean_mse_udd': 3,\n 'count_cuke_SCG04': 1,\n 'count_cuke_SCG05': 1,\n 'count_cuke_SCG06': 1,\n 'count_cuke_SCG07': 1,\n 'count_cuke_SCG01': 1,\n 'count_cuke_SCG02': 1,\n 'count_cuke_SCG03': 1,\n 'I': 2,\n 'count_sqsp_SCF52': 1,\n 'count_sqsp_SCF51': 1,\n 'count_sqsp_SCF50': 1,\n 'count_sqsp_SCF55': 1,\n 'count_sqsp_SCF54': 1,\n 'sum.refuse.shia.pooled': 1,\n 'calc_mse': 3,\n 'amplifyRanksum': 2,\n 'summary.psbayes': 4,\n 'm.move.both': 8,\n 'NAB': 1,\n 'ups_estimate_seats': 1,\n 'NAA': 1,\n 'setArial': 5,\n 'd.full2': 1,\n 'pred_activism_both0': 1,\n 'count_bcac_SCG07': 1,\n 'sum1': 2,\n 'H7_10': 3,\n 'prezvote': 13,\n 'biomass_sdb_SCF02': 1,\n 'biomass_sdb_SCF03': 1,\n 'med.int.pict': 7,\n 'biomass_sdb_SCF06': 1,\n 'total_fish': 1,\n 'biomass_sdb_SCF04': 1,\n 'biomass_sdb_SCF05': 1,\n 'subst_SCF52': 1,\n 'coeftest.cluster': 2,\n 'res2': 1,\n 'res3': 1,\n 'Tmax': 1,\n 'ntarg': 2,\n 'res4': 1,\n 'p.vals.pt': 1,\n 'count_rock_SCG05': 1,\n 'xaxs': 3,\n 'es.num.a.labs': 2,\n 'count_hfbd_SCF60b': 1,\n 'count_rock_SCG03': 1,\n 'theme_slopegraph': 1,\n 'gop_2016_delta': 5,\n 'obstruct': 1,\n 'varfun.Ross.1': 4,\n 'varfun.Ross.2': 4,\n 'qaic': 2,\n 'lm1.1': 1,\n 'cascade.first.use': 7,\n 'dfRelPerception': 1,\n 'balFun': 1,\n 'leadership': 3,\n 'unique.strings': 2,\n 'cate.hat.lowedu.therm': 1,\n 'sp.loglogistic': 1,\n 'sage.infer.gamma': 4,\n 'd_logOR': 4,\n 'Nl': 1,\n 'ranvifset': 1,\n 'Nb': 3,\n 'Nc': 1,\n 'simresults': 1,\n 'Np': 2,\n 'compare.paths.bw.vars': 2,\n 'Nt': 1,\n 'Nw': 3,\n 'beta.usesens.small': 3,\n 'varfun.Rottenstreich.1': 4,\n 'striptextsize': 2,\n 'NB': 1,\n 'w2008': 1,\n 'max_lag_AR': 3,\n 'r1Slim': 1,\n 'w2006': 1,\n 'w2007': 1,\n 'mean.to.odds': 5,\n 'complete': 5,\n 'ind.upper': 3,\n 'parreg_labels': 3,\n 'N1': 3,\n 'N2': 3,\n 'with': 3,\n 'LDAIRTcpp': 9,\n 'dfRec': 1,\n 'dfRel': 2,\n 'count_cash_SCG52a': 1,\n 'count_cash_SCG52b': 1,\n 'aa': 2,\n 'theta.betaX.d.srE.z': 4,\n 'inter.binning': 2,\n 'most_tox': 2,\n 'sar.lik': 1,\n 'as': 1,\n 'at': 1,\n 'find_best_arch_model': 1,\n 'getFirstStage': 31,\n 'bartTest': 1,\n 'make.anova.table': 5,\n 'my.cov': 1,\n 'upperAngle': 5,\n 'reldata': 8,\n 'count_total_SCG01': 1,\n 'm.worker.both': 8,\n 'latlong2state': 1,\n 'Xb.cont': 3,\n 'count_total_SCG06': 1,\n 'standardize': 2,\n 'congress': 2,\n 'scare.hi': 2,\n 'a1': 7,\n 'a3': 8,\n 'a2': 7,\n 'a5': 7,\n 'a4': 7,\n 'a6': 7,\n 'counteqlow': 8,\n 'subNIT2': 5,\n 'p.at': 1,\n 'Bias': 1,\n 'cps': 4,\n 'SynthErrorRatio': 1,\n 'diff.cate.know.2.0': 1,\n 'Rep.STD.IRT.mean': 5,\n 'tt': 3,\n 'overlapCross.core': 6,\n 'tr': 10,\n 'ts': 2,\n 'count_lcod_SCF60b': 1,\n 'count_lcod_SCF60c': 1,\n 'd.att': 1,\n 'count_lcod_SCF60a': 1,\n 'tk': 2,\n 'full.me': 1,\n 'WLS.est': 21,\n 'obj.lfstruct.wb.labs': 2,\n 'initIO': 5,\n 'Flight_min': 3,\n 'impute_aid_inschool': 1,\n 'alpha.usesens.big': 3,\n 't2': 3,\n 't3': 1,\n 't0': 2,\n 't1': 4,\n 'adjust': 13,\n 'exp2table': 1,\n 'pastmax': 3,\n 'yboot.ineff': 2,\n 'diffmar': 1,\n 'recode_demographic_values': 2,\n 'Profile_ymax': 3,\n 'plot_slopegraph': 1,\n 'dir.output': 2,\n 'pred_fun_defor': 4,\n 'full': 4,\n 'plot_semcoh_all': 2,\n 'balance_tests': 5,\n 'richness_SCG53b': 1,\n 'richness_SCG53a': 1,\n 'CAL_list2': 2,\n 'CAL_list3': 2,\n 'post.a.labs': 2,\n 'removeVarFromFormula': 31,\n 'chi.antiamer.pooled': 1,\n 'qualtrics': 1,\n 'rhoval': 1,\n 'activistData': 2,\n 'label.anova.table': 5,\n 'activistDati': 2,\n 'predmanGAM': 2,\n 'bill_endings': 13,\n 'Iv.reg': 1,\n 'online.data': 2,\n 'TuTv': 4,\n 'count_total_SCG58': 1,\n 'kstar': 15,\n 'count_total_SCG50': 1,\n 'count_total_SCG51': 1,\n 'ScatterW_FN': 3,\n 'count_total_SCG55': 1,\n 'quantile.by.value.all.changes': 2,\n 'count_total_SCG57': 1,\n 'larger.se': 3,\n 'mat2list': 3,\n 'biomass_sdb_SCF60c': 1,\n 'biomass_sdb_SCF60b': 1,\n 'int_l': 6,\n 'str_wrap': 4,\n 'int_e': 6,\n 'int_b': 6,\n 'endpoint': 17,\n 'r2': 18,\n 'g_color_hue': 2,\n 'cate.non': 1,\n 'male': 2,\n 'S110.KH': 17,\n 'build.recursion': 2,\n 'lightbrown': 4,\n 'states': 5,\n 'newmodel': 1,\n 'wave2': 4,\n 'myID': 5,\n 'biomass_sqsp_SCF60b': 1,\n 'biomass_sqsp_SCF60c': 1,\n 'biomass_sqsp_SCF60a': 1,\n 'es.con.a.labs': 2,\n 'conj.data.list': 1,\n 'm6n': 7,\n 'yrdrop': 2,\n 'allgrabsSOx': 1,\n 'treatments': 2,\n 'felm_loop_figure': 14,\n 'biomass_olyt_SCF01': 1,\n 'biomass_olyt_SCF02': 1,\n 'biomass_olyt_SCF03': 1,\n 'biomass_olyt_SCF04': 1,\n 'biomass_olyt_SCF05': 1,\n 'biomass_olyt_SCF06': 1,\n 'graph2svg': 5,\n 'med.ext.pict': 7,\n 'mssamplea': 2,\n 'biomass_hfbd_SCF06': 1,\n 'biomass_hfbd_SCF04': 1,\n 'biomass_hfbd_SCF05': 1,\n 'biomass_hfbd_SCF02': 1,\n 'biomass_hfbd_SCF03': 1,\n 'biomass_hfbd_SCF01': 1,\n 'n.interim': 6,\n 'thoughts.averaged': 11,\n 'fraga1_MajC_W': 1,\n 'installed': 1,\n 'vec4': 38,\n 'vec2': 38,\n 'vec3': 38,\n 'fraga1_MajC_L': 1,\n 'vec1': 38,\n 'fraga1_MajC_B': 1,\n 'fraga1_MajC_A': 1,\n 'roots': 2,\n 'cdresults': 1,\n 'join_user_activity_df': 8,\n 'allYield': 2,\n 'get.fieldAdd': 4,\n 'g_stage1': 1,\n 'g_stage2': 1,\n 'vecn': 1,\n 'f.jointindep': 2,\n 'biomass_prch_SCG01': 1,\n 'biomass_prch_SCG02': 1,\n 'biomass_prch_SCG03': 1,\n 'biomass_prch_SCG04': 1,\n 'biomass_prch_SCG05': 1,\n 'sen107': 5,\n 'biomass_prch_SCG07': 1,\n 'n.stepdown': 6,\n 'sen108': 5,\n 'dig3': 2,\n 'it2': 2,\n 'it0': 2,\n 'it1': 2,\n 'tab.2.a': 2,\n 'tab.2.b': 2,\n 'tab.2.c': 2,\n 'Y.treat.obs': 1,\n 'fCalcBootstrapCI': 1,\n 'trend.attitudes': 1,\n 'sigex.blocktoep': 2,\n 'surrounding.years': 2,\n 'JSTestRand': 11,\n 'n_protestyear_unarmed': 2,\n 'feelangry': 4,\n 'sim_mn': 2,\n 'reduce': 11,\n 'obj.quote.a': 2,\n 'obj.quote.b': 2,\n 'obj.quote.c': 2,\n 'outwidths': 2,\n 'obj.show.wb.labs': 2,\n 'usremodel': 1,\n 'boot.policies': 1,\n 'boot_errors': 2,\n 'count_lbstr_SCG52b': 1,\n 'airquality': 1,\n 'count_ocwf_SCG52a': 1,\n 'count_ocwf_SCG52b': 1,\n 'perceqlower': 8,\n 'i.null': 2,\n 'SHA': 1,\n 'SHB': 1,\n 'P_standard_CAN': 1,\n 'varfun.Giessner.1': 4,\n 'outparty.ft2': 2,\n 'Years': 2,\n 'print.estTable': 46,\n 'correct.order': 1,\n 'w': 12,\n ...}"
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "unique, counts = np.unique(all_vars, return_counts=True)\n",
    "dict(zip(unique, counts))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_vars = list(filter(lambda a: a != 'r', all_vars))\n",
    "all_vars = list(filter(lambda a: a != 'R', all_vars))\n",
    "all_vars = list(filter(lambda a: a != 'division', all_vars))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "['close_lo',\n 'soph_lo',\n 'sophsq',\n 'pid_lo',\n 'general_means',\n 'xes',\n 'other_voter_high',\n 'repsen_voter_high',\n 'demsen_voter_high',\n 'rephou_voter_low',\n 'other_se_tab',\n 'othersen_voter_prob',\n 'close',\n 'soph_hi',\n 'demhou_voter_high',\n 'pid_values',\n 'otherhou_voter_prob',\n 'rephou_voter_prob',\n 'o_lo',\n 'repsen_voter_low',\n 'demsen_voter_prob',\n 'ticks',\n 'barCenters',\n 'clinton_voter_high',\n 'rephou_voter_high',\n 'o_hi',\n 'demsen_voter_low',\n 'close_hi',\n 'otherhou_voter_high',\n 'otherhou_voter_low',\n 'primary_se',\n 'primary_means',\n 'repsen_voter_prob',\n 'trump_voter_high',\n 'clinton_voter_low',\n 'other_mean_tab',\n 'othersen_voter_high',\n 'other_voter_prob',\n 'trump_voter_low',\n 'general_se',\n 'obama',\n 'demhou_voter_prob',\n 'trump_voter_prob',\n 'i',\n 'other_voter_low',\n 'othersen_voter_low',\n 'clinton_voter_prob',\n 'demhou_voter_low',\n 'pid_hi',\n 'dt',\n 'dt',\n 'dt',\n 'dt',\n 'dt',\n 'dt',\n 'dt',\n 'dt',\n 'multiplot',\n 'format_raw_data',\n 'rearrange_data',\n 'multiplot',\n 'dt',\n 'lowerbound',\n 'upperbound',\n 'dat.a.bayes.out',\n 'lowerbound',\n 'upperbound',\n 'dat.a.bayes.out',\n 'lowerbound',\n 'upperbound',\n 'dat.a.bayes.out',\n 'lowerbound',\n 'ep.wf.negbin.b',\n 'upperbound',\n 'dat.a.bayes.out',\n 'mnegbin.varj.jags',\n 'lowerbound',\n 'oxyellow',\n 'ep.wf.negbin.b',\n 'oxlightblue',\n 'oxred',\n 'upperbound',\n 'oxblue',\n 'oxorange',\n 'oxdarkgreen',\n 'oxlightgreen',\n 'dat.a.bayes.out',\n 'mnegbin.varj.jags',\n 'oxgrey',\n 'lowerbound',\n 'oxyellow',\n 'ep.wf.negbin.b',\n 'oxlightblue',\n 'oxred',\n 'upperbound',\n 'oxblue',\n 'oxorange',\n 'oxdarkgreen',\n 'oxlightgreen',\n 'dat.a.bayes.out',\n 'mnegbin.varj.jags',\n 'oxgrey',\n 'lowerbound',\n 'oxyellow',\n 'ep.wf.negbin.b',\n 'oxblue',\n 'oxred',\n 'upperbound',\n 'dat.a.bayes.out',\n 'oxorange',\n 'oxdarkgreen',\n 'oxlightgreen',\n 'oxlightblue',\n 'dat.a.wf.out',\n 'mnegbin.varj.jags',\n 'oxgrey',\n 'lowerbound',\n 'oxyellow',\n 'ep.wf.negbin.b',\n 'oxblue',\n 'oxred',\n 'upperbound',\n 'dat.a.bayes.out',\n 'oxorange',\n 'oxdarkgreen',\n 'oxlightgreen',\n 'oxlightblue',\n 'dat.a.wf.out',\n 'mnegbin.varj.jags',\n 'oxgrey',\n 'lowerbound',\n 'oxyellow',\n 'ep.wf.negbin.b',\n 'oxblue',\n 'oxred',\n 'upperbound',\n 'dat.a.bayes.out',\n 'oxorange',\n 'oxdarkgreen',\n 'oxlightgreen',\n 'oxlightblue',\n 'dat.a.wf.out',\n 'mnegbin.varj.jags',\n 'oxgrey',\n 'lowerbound',\n 'oxyellow',\n 'ep.wf.negbin.b',\n 'oxblue',\n 'oxred',\n 'upperbound',\n 'dat.a.bayes.out',\n 'oxorange',\n 'oxdarkgreen',\n 'oxlightgreen',\n 'oxlightblue',\n 'dat.a.wf.out',\n 'mnegbin.varj.jags',\n 'oxgrey',\n 'lowerbound',\n 'oxyellow',\n 'ep.wf.negbin.b',\n 'oxblue',\n 'oxred',\n 'wfm.filter',\n 'upperbound',\n 'dat.a.bayes.out',\n 'oxorange',\n 'oxdarkgreen',\n 'oxlightgreen',\n 'oxlightblue',\n 'dat.a.wf.out',\n 'mnegbin.varj.jags',\n 'oxgrey',\n 'theme_bw_finegrid_finer',\n 'theme_bw_finegrid_special',\n 'theme_bw_finegrid_horizontal_minor',\n 'theme_bw_finegrid_horizontal_fine_x',\n 'theme_bw_finegrid',\n 'theme_bw_finegrid_horizontal',\n 'theme_bw_finegrid_finer',\n 'theme_bw_finegrid_special',\n 'theme_bw_finegrid_horizontal_minor',\n 'summarySE',\n 'theme_bw_finegrid_horizontal_fine_x',\n 'theme_bw_finegrid',\n 'theme_bw_finegrid_horizontal',\n 'root',\n 'root',\n 'root',\n 'inter.binning.90',\n 'inter.binning.90',\n 'multiplot',\n 'root',\n 'inter.binning.90',\n 'multiplot',\n 'ols',\n 'root',\n 'inter.binning.90',\n 'multiplot',\n 'ols',\n 'root',\n 'inter.binning.90',\n 'multiplot',\n 'ols',\n 'root',\n 'wordfish_rank_plot_apriori_ordering',\n 'outparty.ft',\n 'sameparty.ft',\n 'pid',\n 'female',\n 'window5',\n 'finish.date',\n 'naes',\n 'days.cubed',\n 'outparty.ft2',\n 'julyfourth',\n 'window1',\n 'window7',\n 'jm.therm2',\n 'jm.therm3',\n 'days.squared',\n 'black',\n 'income',\n 'white',\n 'days.away',\n 'bho.therm3',\n 'bho.therm2',\n 'window3',\n 'educ',\n 'weeks.away',\n 'age',\n 'sp',\n 'wk.away',\n 'hispanic',\n 'dem',\n 'window14',\n 'window10',\n 'sv.exp',\n 'bias.exp',\n 'bias2.exp',\n 'bias50',\n 'pups_estimate_seats',\n 'bias2.bilogit',\n 'bias.bilogit',\n 'eg',\n 'sv.bilogit',\n 'ups_estimate_seats',\n 'lopsided.outcomes',\n 'bias',\n 'sv.step',\n 'sv.hyp',\n 'rep.row',\n 'resp',\n 'declination',\n 'args',\n 'args',\n 'args',\n 'kFigureWidth',\n 'args',\n 'SAVEDIR',\n 'GIGANTEA_R',\n 'Temp',\n 'GIGANTEA_R',\n 'Methodcode',\n 'Nl',\n 'Lowerlimit',\n 'GIGANTEA_cpp',\n 'Missing',\n 'Referencevalues',\n 'Geno',\n 'Freevec',\n 'Parametername',\n 'Np',\n 'Input',\n 'Samplingpoint',\n 'Y',\n 'Upperlimit',\n 'percent.bal',\n 'ks.fast',\n 'Mks.test',\n 'hl.rbound',\n 'lower.bound',\n 'pscore.eval',\n 'ksboot',\n 'ttest',\n 'pvalue',\n 'percent.bal',\n 'ks.fast',\n 'Mks.test',\n 'hl.rbound',\n 'lower.bound',\n 'pscore.eval',\n 'ksboot',\n 'ttest',\n 'pvalue',\n 'cached_data_file',\n 'raw_data_file',\n 'cached_data_file',\n 'raw_data_file',\n 'save_data_file',\n 'cached_data_file',\n 'raw_data_file',\n 'save_data_file',\n 'cached_data_file',\n 'raw_data_file',\n 'save_data_file',\n 'cached_data_file',\n 'raw_data_file',\n 'save_data_file',\n 'cached_data_file',\n 'raw_data_file',\n 'z',\n 'y',\n 'dat',\n 'p',\n 'lev',\n 'x',\n 'xy',\n 'save_data_file',\n 'cached_data_file',\n 'raw_data_file',\n 'z',\n 'y',\n 'dat',\n 'p',\n 'lev',\n 'x',\n 'xy',\n 'save_data_file',\n 'cached_data_file',\n 'raw_data_file',\n 'z',\n 'y',\n 'dat',\n 'p',\n 'lev',\n 'x',\n 'xy',\n 'save_data_file',\n 'cached_data_file',\n 'raw_data_file',\n 'z',\n 'y',\n 'dat',\n 'p',\n 'lev',\n 'x',\n 'xy',\n 'save_data_file',\n 'cached_data_file',\n 'raw_data_file',\n 'sd_mult',\n 'df_sd_mult',\n 'sd_add',\n 'df_sd_add',\n 'g_per_s_to_m3_per_day',\n 'm3_per_L',\n 'M3_per_cubic_foot',\n 'sd_mult',\n 'T_standard_ISO',\n 'T_standard_USA',\n 'df_sd_mult',\n 'T_standard_CAN',\n 'sd_add',\n 'bulk_slpm_METEC_to_g_per_s_CH4',\n 'g_per_s_to_scfh',\n 'CH4_molar_mass',\n 'df_sd_add',\n 'P_standard_CAN',\n 'P_standard_ISO',\n 'P_standard_USA',\n 'list.of.packages',\n 'new.packages',\n 'list.of.packages',\n 'new.packages',\n 'list.of.packages',\n 'new.packages',\n 'list.of.packages',\n 'new.packages',\n 'list.of.packages',\n 'new.packages',\n 'gof_files',\n 'final_dta',\n 'model_path',\n 'model_files',\n 'gof_path',\n 'index_id',\n 'list.of.packages',\n 'new.packages',\n 'gof_files',\n 'final_dta',\n 'model_path',\n 'model_files',\n 'gof_path',\n 'index_id',\n 'list.of.packages',\n 'new.packages',\n 'reg_runner_news_rev_levels',\n 'reg_runner_news',\n 'reg_runner_rev',\n 'reg_runner_alt',\n 'reg_runner_news_1884',\n 'reg_runner_news_change',\n 'reg_runner_resid',\n 'reg_runner_news_rev_levels',\n 'reg_runner_news',\n 'reg_runner_rev',\n 'reg_runner_alt',\n 'reg_runner_news_1884',\n 'reg_runner_news_change',\n 'reg_runner_resid',\n 'reg_runner_news_rev_levels',\n 'reg_runner_news',\n 'reg_runner_rev',\n 'reg_runner_alt',\n 'reg_runner_news_1884',\n 'reg_runner_news_change',\n 'reg_runner_resid',\n 'PSPS_M_weighting_SA',\n 'PS_pred',\n 'PredTreatEffect_SA',\n 'PSPS_M_weighting',\n 'PSPS_M_weighting_SA',\n 'PredTreatEffect_SA',\n 'PSPS_M_weighting_SA',\n 'PSPS_SM_weighting',\n 'PS_pred',\n 'PredTreatEffect_SA',\n 'PSPS_M_weighting',\n 'PSPS_M_weighting_SA',\n 'PSPS_SM_weighting',\n 'PS_pred',\n 'PredTreatEffect_SA',\n 'PSPS_M_weighting',\n 'theme_base_edited',\n 'aggregate.matrix',\n 'vcovCluster',\n 'inter.kernel',\n 'inter.binning',\n 'inter.binning.lmer',\n 'inter.gam',\n 'crossvalidate',\n 'inter.raw',\n 'coefs',\n 'altKrigCRF',\n 'shade.color',\n 'years',\n 'x.useresp.small',\n 'VOTES.RESP.SMALL',\n 'VOTES.SEN.SMALL',\n 'h110',\n 'CCES2008',\n 's110',\n 'x.usesen.small',\n 'x.useall.small',\n 'z.110',\n 'H110.KH',\n 'x.usehouse.small',\n 'CCES08',\n 'S110.KH',\n 'VOTES.HOUSE.SMALL',\n 'x.useresp.small',\n 'VOTES.RESP.SMALL',\n 'VOTES.SEN.SMALL',\n 'h110',\n 'CCES2008',\n 's110',\n 'x.usesen.small',\n 'x.useall.small',\n 'z.110',\n 'H110.KH',\n 'x.usehouse.small',\n 'CCES08',\n 'S110.KH',\n 'VOTES.HOUSE.SMALL',\n 'smsd.saved.objects',\n 'x.useresp.small',\n 'VOTES.RESP.SMALL',\n 'VOTES.SEN.SMALL',\n 'h110',\n 'CCES2008',\n 's110',\n 'x.usesen.small',\n 'x.useall.small',\n 'z.110',\n 'H110.KH',\n 'x.usehouse.small',\n 'CCES08',\n 'S110.KH',\n 'VOTES.HOUSE.SMALL',\n 'alpha.usesen.small',\n 'prezvote.probit.usehouse.pid',\n 'x.useresp.small',\n 'VOTES.SEN.SMALL',\n 'nresp',\n 'h110',\n 'alpha.usehouse.small.median',\n 'x.usehouse.prezid',\n 'alpha.usesen.small.median',\n 'prezvote.probit.usesens.pid',\n 'x.useall.small',\n 'alpha.useresp.small',\n 's110',\n 'beta.useall.small',\n 'x.usesen.small',\n 'beta.useresp.small',\n 'alpha.useall.small.median',\n 'prezprobit.usesen',\n 'x.usesen.prezid',\n 'H110.KH',\n 'x.usehouse.small',\n 'prezprobit.usehouse',\n 'nsen',\n 'VOTES.RESP.SMALL',\n 'nhouse',\n 'x.useresp.prezid',\n 'smsd.saved.objects',\n 'sMSD',\n 'pid3',\n 'prezvote',\n 'CCES08',\n 'VOTES.HOUSE.SMALL',\n 'alpha.useresp.small.median',\n 'z.110',\n 'CCES2008',\n 'beta.usesen.small',\n 'alpha.useall.small',\n 'alpha.usehouse.small',\n 'beta.usehouse.small',\n 'prezvote.probit.useresps.pid',\n 'S110.KH',\n 'prezprobit.useresp',\n 'smsd.saved.objects',\n 'x.useresp.small',\n 'VOTES.RESP.SMALL',\n 'VOTES.SEN.SMALL',\n 'h110',\n 'CCES2008',\n 's110',\n 'x.usesen.small',\n 'x.useall.small',\n 'z.110',\n 'H110.KH',\n 'x.usehouse.small',\n 'CCES08',\n 'S110.KH',\n 'VOTES.HOUSE.SMALL',\n 'alpha.usesen.small',\n 'prezvote.probit.usehouse.pid',\n 'x.useresp.small',\n 'VOTES.SEN.SMALL',\n 'xs.111',\n 'nresp',\n 'h110',\n 'alpha.usehouse.small.median',\n 'xs.500',\n 's.111',\n 'x.useall.small',\n 'x.usehouse.prezid',\n 'xs.2000',\n 'beta.useall.small',\n 'hetIRT.sims.list.500',\n 'hetIRT.sims.list.111',\n 'y.resp.nonnumeric',\n 'alpha.useresp.small',\n 's110',\n 'DAT',\n 'x.usesen.small',\n 'beta.useresp.small',\n 'alpha.useall.small.median',\n 'prezprobit.usesen',\n 'x.useresp.prezid',\n 'x.usesen.prezid',\n 'H110.KH',\n 'VOTES.HOUSE.SMALL',\n 'alpha.usesen.small.median',\n 'nsen',\n 'xs.all',\n 'VOTES.RESP.SMALL',\n 'nhouse',\n 's.all',\n 'smsd.saved.objects',\n 'sMSD',\n 'hetIRT.sims.list.2k',\n 'prezprobit.usehouse',\n 'pid3',\n 'prezvote',\n 'CCES08',\n 'prezvote.probit.usesens.pid',\n 'alpha.useresp.small.median',\n 'i',\n 'x.usehouse.small',\n 'z.110',\n 'CCES2008',\n 'beta.usesen.small',\n 'survey.wd',\n 'alpha.useall.small',\n 'alpha.usehouse.small',\n 's.500',\n 'beta.usehouse.small',\n 'y.resp',\n 'prezvote.probit.useresps.pid',\n 's.2000',\n 'S110.KH',\n 'prezprobit.useresp',\n 'alpha.usesen.small',\n 'prezvote.probit.usehouse.pid',\n 'x.useresp.small',\n 'VOTES.SEN.SMALL',\n 'xs.111',\n 'nresp',\n 'h110',\n 'alpha.usehouse.small.median',\n 'xs.500',\n 's.111',\n 'x.useall.small',\n 'x.usehouse.prezid',\n 'xs.2000',\n 'beta.useall.small',\n 'hetIRT.sims.list.500',\n 'hetIRT.sims.list.111',\n 'y.resp.nonnumeric',\n 'alpha.useresp.small',\n 's110',\n 'DAT',\n 'x.usesen.small',\n 'beta.useresp.small',\n 'alpha.useall.small.median',\n 'prezprobit.usesen',\n 'x.useresp.prezid',\n 'x.usesen.prezid',\n 'H110.KH',\n 'VOTES.HOUSE.SMALL',\n 'alpha.usesen.small.median',\n 'nsen',\n 'xs.all',\n 'VOTES.RESP.SMALL',\n 'nhouse',\n 's.all',\n 'smsd.saved.objects',\n 'sMSD',\n 'hetIRT.sims.list.2k',\n 'prezprobit.usehouse',\n 'pid3',\n 'prezvote',\n 'CCES08',\n 'prezvote.probit.usesens.pid',\n 'alpha.useresp.small.median',\n 'i',\n 'x.usehouse.small',\n 'z.110',\n 'CCES2008',\n 'beta.usesen.small',\n 'survey.wd',\n 'alpha.useall.small',\n 'alpha.usehouse.small',\n 's.500',\n 'beta.usehouse.small',\n 'y.resp',\n 'prezvote.probit.useresps.pid',\n 's.2000',\n 'S110.KH',\n 'prezprobit.useresp',\n 'alpha.usesen.small',\n 'prezvote.probit.usehouse.pid',\n 'x.useresp.small',\n 'VOTES.SEN.SMALL',\n 'nresp',\n 'h110',\n 'alpha.usehouse.small.median',\n 'x.usehouse.prezid',\n 'beta.useall.small',\n 'prezvote.probit.usesens.pid',\n 'x.useall.small',\n 'y.resp.nonnumeric',\n 'alpha.useresp.small',\n 's110',\n 'DAT',\n 'x.usesen.small',\n 'beta.useresp.small',\n 'alpha.useall.small.median',\n 'prezprobit.usesen',\n 'x.usesen.prezid',\n 'H110.KH',\n 'x.usehouse.small',\n 'alpha.usesen.small.median',\n 'nsen',\n 'VOTES.RESP.SMALL',\n 'nhouse',\n 'x.useresp.prezid',\n 'smsd.saved.objects',\n 'sMSD',\n 'prezprobit.usehouse',\n 'pid3',\n 'prezvote',\n 'CCES08',\n 'VOTES.HOUSE.SMALL',\n 'alpha.useresp.small.median',\n 'z.110',\n 'CCES2008',\n 'beta.usesen.small',\n 'survey.wd',\n 'alpha.useall.small',\n 'alpha.usehouse.small',\n 'beta.usehouse.small',\n 'y.resp',\n 'prezvote.probit.useresps.pid',\n 'S110.KH',\n 'prezprobit.useresp',\n 'alpha.usesen.small',\n 'prezvote.probit.usehouse.pid',\n 'x.useresp.small',\n 'VOTES.SEN.SMALL',\n 'nresp',\n 'h110',\n 'alpha.usehouse.small.median',\n 'x.usehouse.prezid',\n 'beta.useall.small',\n 'prezvote.probit.usesens.pid',\n 'x.useall.small',\n 'y.resp.nonnumeric',\n 'alpha.useresp.small',\n 's110',\n 'DAT',\n 'x.usesen.small',\n 'beta.useresp.small',\n 'alpha.useall.small.median',\n 'prezprobit.usesen',\n 'x.usesen.prezid',\n 'H110.KH',\n 'x.usehouse.small',\n 'alpha.usesen.small.median',\n 'nsen',\n 'VOTES.RESP.SMALL',\n 'nhouse',\n 'x.useresp.prezid',\n 'smsd.saved.objects',\n 'sMSD',\n 'prezprobit.usehouse',\n 'pid3',\n 'prezvote',\n 'CCES08',\n 'VOTES.HOUSE.SMALL',\n 'alpha.useresp.small.median',\n 'z.110',\n 'CCES2008',\n 'beta.usesen.small',\n 'survey.wd',\n 'alpha.useall.small',\n 'alpha.usehouse.small',\n 'beta.usehouse.small',\n 'y.resp',\n 'prezvote.probit.useresps.pid',\n 'S110.KH',\n 'prezprobit.useresp',\n 'alpha.usesen.small',\n 'prezvote.probit.usehouse.pid',\n 'x.useresp.small',\n 'VOTES.SEN.SMALL',\n 'xs.111',\n 'nresp',\n 'h110',\n 'alpha.usehouse.small.median',\n 'xs.500',\n 's.111',\n 'x.useall.small',\n 'x.usehouse.prezid',\n 'xs.2000',\n 'beta.useall.small',\n 'hetIRT.sims.list.500',\n 'hetIRT.sims.list.111',\n 'y.resp.nonnumeric',\n 'alpha.useresp.small',\n 's110',\n 'DAT',\n 'x.usesen.small',\n 'beta.useresp.small',\n 'alpha.useall.small.median',\n 'prezprobit.usesen',\n 'x.useresp.prezid',\n 'x.usesen.prezid',\n 'H110.KH',\n 'VOTES.HOUSE.SMALL',\n 'alpha.usesen.small.median',\n 'nsen',\n 'xs.all',\n 'VOTES.RESP.SMALL',\n 'nhouse',\n 's.all',\n 'smsd.saved.objects',\n 'sMSD',\n 'hetIRT.sims.list.2k',\n 'prezprobit.usehouse',\n 'pid3',\n 'prezvote',\n 'CCES08',\n 'prezvote.probit.usesens.pid',\n 'alpha.useresp.small.median',\n 'i',\n 'x.usehouse.small',\n 'z.110',\n 'CCES2008',\n 'beta.usesen.small',\n 'survey.wd',\n 'alpha.useall.small',\n 'alpha.usehouse.small',\n 's.500',\n 'beta.usehouse.small',\n 'y.resp',\n 'prezvote.probit.useresps.pid',\n 's.2000',\n 'S110.KH',\n 'prezprobit.useresp',\n 'alpha.usesen.small',\n 'prezvote.probit.usehouse.pid',\n 'x.useresp.small',\n 'VOTES.SEN.SMALL',\n 'xs.111',\n 'nresp',\n 'h110',\n 'alpha.usehouse.small.median',\n 'xs.500',\n 's.111',\n 'x.useall.small',\n 'x.usehouse.prezid',\n 'xs.2000',\n 'beta.useall.small',\n 'hetIRT.sims.list.500',\n 'hetIRT.sims.list.111',\n 'y.resp.nonnumeric',\n 'alpha.useresp.small',\n 's110',\n 'DAT',\n 'x.usesen.small',\n 'beta.useresp.small',\n 'alpha.useall.small.median',\n 'prezprobit.usesen',\n 'x.useresp.prezid',\n 'x.usesen.prezid',\n 'H110.KH',\n 'VOTES.HOUSE.SMALL',\n 'alpha.usesen.small.median',\n 'nsen',\n 'xs.all',\n 'VOTES.RESP.SMALL',\n 'nhouse',\n 's.all',\n 'smsd.saved.objects',\n 'sMSD',\n 'hetIRT.sims.list.2k',\n 'prezprobit.usehouse',\n 'pid3',\n 'prezvote',\n 'CCES08',\n 'prezvote.probit.usesens.pid',\n 'alpha.useresp.small.median',\n 'i',\n 'x.usehouse.small',\n 'z.110',\n 'CCES2008',\n 'beta.usesen.small',\n 'survey.wd',\n 'alpha.useall.small',\n 'alpha.usehouse.small',\n 's.500',\n 'beta.usehouse.small',\n 'y.resp',\n 'prezvote.probit.useresps.pid',\n 's.2000',\n 'S110.KH',\n 'prezprobit.useresp',\n 'alpha.usesen.small',\n 'prezvote.probit.usehouse.pid',\n 'x.useresp.small',\n 'VOTES.SEN.SMALL',\n 'xs.111',\n 'nresp',\n 'h110',\n 'alpha.usehouse.small.median',\n 'xs.500',\n 's.111',\n 'x.useall.small',\n 'x.usehouse.prezid',\n 'xs.2000',\n 'beta.useall.small',\n 'hetIRT.sims.list.500',\n 'hetIRT.sims.list.111',\n 'y.resp.nonnumeric',\n 'alpha.useresp.small',\n 's110',\n 'DAT',\n 'x.usesen.small',\n 'beta.useresp.small',\n 'alpha.useall.small.median',\n 'prezprobit.usesen',\n 'x.useresp.prezid',\n 'x.usesen.prezid',\n 'H110.KH',\n 'VOTES.HOUSE.SMALL',\n 'alpha.usesen.small.median',\n 'nsen',\n 'xs.all',\n 'VOTES.RESP.SMALL',\n 'nhouse',\n 's.all',\n 'smsd.saved.objects',\n 'sMSD',\n 'hetIRT.sims.list.2k',\n 'prezprobit.usehouse',\n 'pid3',\n 'prezvote',\n 'CCES08',\n 'prezvote.probit.usesens.pid',\n 'alpha.useresp.small.median',\n 'i',\n 'x.usehouse.small',\n 'z.110',\n 'CCES2008',\n 'beta.usesen.small',\n 'survey.wd',\n 'alpha.useall.small',\n 'alpha.usehouse.small',\n 's.500',\n 'beta.usehouse.small',\n 'y.resp',\n 'prezvote.probit.useresps.pid',\n 's.2000',\n 'S110.KH',\n 'prezprobit.useresp',\n 'alpha.usesen.small',\n 'prezvote.probit.usehouse.pid',\n 'x.useresp.small',\n 'VOTES.SEN.SMALL',\n 'xs.111',\n 'nresp',\n 'h110',\n 'alpha.usehouse.small.median',\n 'xs.500',\n 's.111',\n 'x.useall.small',\n 'x.usehouse.prezid',\n 'xs.2000',\n 'beta.useall.small',\n 'hetIRT.sims.list.500',\n 'hetIRT.sims.list.111',\n 'y.resp.nonnumeric',\n 'alpha.useresp.small',\n 's110',\n 'DAT',\n 'x.usesen.small',\n 'beta.useresp.small',\n 'alpha.useall.small.median',\n 'prezprobit.usesen',\n 'x.useresp.prezid',\n 'x.usesen.prezid',\n 'H110.KH',\n 'VOTES.HOUSE.SMALL',\n 'alpha.usesen.small.median',\n 'nsen',\n 'xs.all',\n 'VOTES.RESP.SMALL',\n 'nhouse',\n 's.all',\n 'smsd.saved.objects',\n 'sMSD',\n 'hetIRT.sims.list.2k',\n 'prezprobit.usehouse',\n 'pid3',\n 'prezvote',\n 'CCES08',\n 'prezvote.probit.usesens.pid',\n 'alpha.useresp.small.median',\n 'i',\n 'x.usehouse.small',\n 'z.110',\n 'CCES2008',\n 'beta.usesen.small',\n 'survey.wd',\n 'alpha.useall.small',\n 'alpha.usehouse.small',\n 's.500',\n 'beta.usehouse.small',\n 'y.resp',\n 'prezvote.probit.useresps.pid',\n 's.2000',\n 'S110.KH',\n 'prezprobit.useresp',\n 'alpha.usesen.small',\n 'prezvote.probit.usehouse.pid',\n 'x.useresp.small',\n 'VOTES.SEN.SMALL',\n 'xs.111',\n 'nresp',\n 'h110',\n 'x.useresps.small',\n 'alpha.usehouse.small.median',\n 'x.usesens.small.hard',\n 's.111',\n 'x.useall.small',\n 'x.usehouse.prezid',\n ...]"
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_vars"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "10"
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "total_avg = sum( map(len, all_vars) ) / len(all_vars)\n",
    "total_avg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": "1974\n"
    }
   ],
   "source": [
    "onetwo = 0\n",
    "\n",
    "for a in all_vars:\n",
    "    if len(a) <3:\n",
    "        onetwo+=1\n",
    "        \n",
    "print(onetwo)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": "         division;R;r\n3652                                  division;R;r;select\n3653                                  division;R;r;select\n3654                                  division;R;r;select\n3655    collidev;division;PositionDodgeV;vcovCluster;g...\n3656                                          bump;bounce\n3658                                         division;R;r\n3661                                         division;R;r\n3662                                         division;R;r\n3663                      division;resample;R;clse;stan;r\n3664                      division;resample;R;clse;stan;r\n3665                      division;resample;R;clse;stan;r\n3666                      division;resample;R;clse;stan;r\n3667                      division;resample;R;clse;stan;r\n3668                      division;resample;R;clse;stan;r\n3672                                         division;R;r\n3673                                         division;R;r\n3674                                         division;R;r\n3675                                         division;R;r\n3676                                         division;R;r\n3677                                         division;R;r\n3678                                         division;R;r\n3679                                         division;R;r\n3680                                         division;R;r\n3681                                         division;R;r\n3682                                         division;R;r\n3683                                         division;R;r\n3684                                         division;R;r\n3685                                         division;R;r\n3686                                         division;R;r\n3687                                         division;R;r\n3688                                         division;R;r\n3689                                         division;R;r\n3690                                         division;R;r\n3691                                                   pa\n3694                                         division;R;r\n3696                                         division;R;r\n3697                                         division;R;r\n3698                                         division;R;r\n3699                                         division;R;r\n3700                                         division;R;r\n3701                                         division;R;r\n3704                                         division;R;r\n3707                                         division;R;r\n3708                                   division;R;r;mturk\n3711                                         division;R;r\n3712       amplifyRanksum;amplifySignrank;amplifyObserved\n3713    division;amplifyUnobserved;amplifyObserved;r;R...\n3717                                         division;R;r\n3719                      univlist;x;u;output.table;x.sub\n3720    division;ATE;x.sub;tab.data;univlist;tabulate....\n3721    division;ATE;x.sub;tab.data;univlist;tabulate....\n3722    ATE;sdata;tabulate.data;get.perm.pval;tab.data...\n3723    ATE.s.hat;ATE;sdata;pvals;get.perm.pval;get.pe...\n3724    ATE.s.hat;ATE;sdata;pvals;x.reject;get.perm.pv...\n3726                                   meff;barCenters;se\n3729    Liberia;Sierra_Leone;Kosovo;CDI;Years;Burundi;...\n3731                                         division;R;r\n3732                                         division;R;r\n3733                                         division;R;r\n3734                                         division;R;r\n3735                                         division;R;r\n3736                                         division;R;r\n3737                                         division;R;r\n3740                 continuous_margeff_plot;R;r;division\n3741    division;plot_dat23;continuous_margeff_plot;pl...\n3742    division;dime;plot_dat23;continuous_margeff_pl...\n3743    state;division;dime;plot_dat23;continuous_marg...\n3744    state;division;dime;plot_dat23;continuous_marg...\n3745    state;division;dime_cont;dime;plot_dat23;conti...\n3746    state;division;dime_cont;dime;plot_dat23;conti...\n3747    sle_sm_dat;division;state;dime_cont;dime;plot_...\n3748    sle_sm_dat;division;state;dime_cont;sm_ind;dim...\n3749    datacfr;fv;most_tox;pre_61;datacfr1;model_1;be...\n3751                        division;R;r;gen.outcome.vars\n3752                        division;R;r;gen.outcome.vars\n3753          division;r;gen.pop.stats;gen.outcome.vars;R\n3754    division;gen.outcome.vars;gen.treatment;r;R;ge...\n3756                                         division;R;r\n3757                                         division;R;r\n3758                                         division;R;r\n3759                                         division;R;r\n3761                                         division;R;r\n3762                                         division;R;r\n3764                                         division;R;r\n3765                                         division;R;r\n3766    sum.diff.shia;mod.ind.extra1;chi.policies.pool...\n3767    remrecogs;remwars;wars;capwins;pre.remrecogs;r...\n3769                                         division;R;r\n3770                                         division;R;r\n3771                                         division;R;r\n3773                                         division;R;r\n3774                                         division;R;r\n3775    parp.impulse;wald;parpfilter;pewmallf;parp.mul...\n3776    wald;pewmallf;parp.multipliers;pewmafilter;par...\n3777    wald;pewmallf;parp.multipliers;pewmafilter;par...\n3778    dat4b;dat4a;dat2a;dat2b;dat4;dat3;dat2;dat;dat...\n3779    dat4b;division;dat4a;dat2a;dat2b;dat4;dat3;dat...\n3783                                         division;R;r\n3786                                         division;R;r\n3787                                         division;R;r\n3789                                         division;R;r\n3790                       division;r;joint.iv;ratio.se;R\n3791                       division;r;joint.iv;ratio.se;R\n3792                       division;r;joint.iv;ratio.se;R\n3793                       division;r;joint.iv;ratio.se;R\n3794                       division;r;joint.iv;ratio.se;R\n3797                                         division;R;r\n3798    to_colorscale;extract_vars_heritable;plot_dist...\n3802                                         division;R;r\n3804                                         division;R;r\n3807                                           add_legend\n3809                                         division;R;r\n3810                                         division;R;r\n3811                                         division;R;r\n3813                                         division;R;r\n3818                                         division;R;r\n3819                                         division;R;r\n3820                                         division;R;r\n3821    control;figure;Nc;number;out.P2;Y;Np;Nt;my.cov...\n3823                                         division;R;r\n3824                                         division;R;r\n3830                              print.ict.test;ict.test\n3831                            division;nb.under.lik;r;R\n3833                                         division;R;r\n3834                            division;nb.under.lik;r;R\n3836                                         division;R;r\n3837                                         division;R;r\n3838                                         division;R;r\n3840                                         division;R;r\n3841                                         division;R;r\n3842                                         division;R;r\n3843                                     statafig1;fig1df\n3844                        division;R;r;statafig1;fig1df\n3845    keepvars;d;lib;dta_in;repos;args;in_lasso;inst...\n3847                                         division;R;r\n3848                                         division;R;r\n3849                                         division;R;r\n3850                                          Kern;Gdata1\n3851           division;Gdata1;MODULE.DIR;Kern;r;gen.af;R\n3852           division;Gdata1;MODULE.DIR;Kern;r;gen.af;R\n3853                      division;Gdata1;Kern;r;gen.af;R\n3855    yrdrop;all;py;off;noally;rs;dens;px;ntarg;targ...\n3856    rbeta.d.s.gibbs;rwish;theta.betaX.d.srE.z;rmvn...\n3857    division;rbeta.d.s.gibbs;XuXv;dd;theta.betaX.d...\n3858    division;rbeta.d.s.gibbs;XuXv;dd;theta.betaX.d...\n3860                                      SHA;SHB;gpA;gpB\n3866                                             res_nocv\n3867                         division;R;r;thetas;res_nocv\n3868                         division;R;r;thetas;res_nocv\n3869                         division;R;r;thetas;res_nocv\n3870                         division;R;r;thetas;res_nocv\n3871                         division;R;r;thetas;res_nocv\n3872                         division;R;r;thetas;res_nocv\n3873                         division;R;r;thetas;res_nocv\n3874                         division;R;r;thetas;res_nocv\n3875       dirs;division;wd;thetas;i;res_nocv;R;scripts;r\n3876       dirs;division;wd;thetas;i;res_nocv;R;scripts;r\n3877    dirs;division;wd;thetas;i;pred_plm;res_nocv;R;...\n3878    dirs;division;wd;thetas;i;pred_plm;res_nocv;R;...\n3879    dirs;division;wd;thetas;i;pred_plm;res_nocv;R;...\n3880    hr;data2A;data2B;data1A;data1B;other;data;data...\n3881    m2013;m2012;y1;y2;division;myvars;m1999;m1998;...\n3882    m2013;m2012;y1;y2;division;myvars;m1999;m1998;...\n3883    m2013;m2012;y1;y2;l2010;l2011;l2012;color;win2...\n3885                                         division;R;r\n3886                                         division;R;r\n3887                                         division;R;r\n3888                                         division;R;r\n3889                                         division;R;r\n3890                                         division;R;r\n3891    division;j;ses;plot_data;i;res_tab;ests;est_mo...\n3892    plot_data;j;ses;i;res_tab;est_mod4;est_mod3;es...\n3893    division;j;ses;plot_data;i;res_tab;ests;est_mo...\n3894    division;j;ses;plot_data;i;res_tab;ests;est_mo...\n3898                                         division;R;r\n3901    comment;division;R;m5s.cln.num_female_users;m5...\n3903                                         division;R;r\n3905                 division;R;r;loessplot;general_loess\n3906    it10;tm1;labels;i_lab;i1;i0;i2;t2;i10;t0;t_lab...\n3907    i_lab;labels;t_lab;it10;t10;itm1;itm9;division...\n3908    randomDGS;matchCond;orderFast;sortString;no.mo...\n3909    addStar;sortString;modifyStr;randomDGS;getSolu...\n3910    segmentor;simplifier;split_and_match;sum_over_...\n3911    simplifier;split_and_match;simple_cap;segmento...\n3912    simplifier;split_and_match;simple_cap;segmento...\n3913    simplifier;split_and_match;simple_cap;segmento...\n3914    simplifier;split_and_match;simple_cap;segmento...\n3915    simplifier;split_and_match;simple_cap;segmento...\n3916    simplifier;split_and_match;simple_cap;segmento...\n3917    simplifier;split_and_match;simple_cap;segmento...\n3918    scale_fill_soa;logmsg;simple_cap;segmentor;sim...\n3919    scale_fill_soa;logmsg;simple_cap;segmentor;sim...\n3920    summary.probit.seq;probit.seq;probit.nr;mi.res...\n3921    division;summary.probit.seq;probit.seq;probit....\n3922    division;summary.probit.seq;probit.seq;probit....\n3924         division;R;r;vcovCluster;abstentions.include\n3925         division;R;r;vcovCluster;abstentions.include\n3926    Pos.cut.off;Comm.risk.hosp;Data.gen;ARI.est;Do...\n3927    division;Pos.cut.off;Comm.risk.hosp;Data.gen;A...\n3928    IBM.prior.group;division;IBM.LL;IBM.posterior....\n3929    IBM.prior.group;division;IBM.LL;IBM.posterior....\n3930    Profile.shed;Data.gen;Set.sim;Dot.plot2;plot.b...\n3931    Profile.shed;Data.gen;Set.sim;Dot.plot2;plot.b...\n3933                                         division;R;r\n3934    gt;vol;Imatrix;volt;as;FC;Dep0;EBIT;n;rt;PV;P0...\n3935                                     C;i;P0;n;vol;P;r\n3936                           f2;f3;d;i;labs;yhi;ylo;cex\n3938                                    division;R;r;data\n3939              division;UUdata;R;SKdata;r;data;alldata\n3940              division;UUdata;R;SKdata;r;data;alldata\n3942    cluster_se_glm;get.conditional.effects;plot.am...\n3943    division;cluster_se_glm;get.conditional.effect...\n3944    add.stars;id.nic;bs;Round.To;gen.output.table;...\n3946                                         division;R;r\n3947                                         division;R;r\n3948                                         division;R;r\n3949                                         division;R;r\n3950                                         division;R;r\n3951                                         division;R;r\n3952                                         division;R;r\n3953                                         division;R;r\n3954                                         division;R;r\n3955    f.percent.debug;f.percent;f.lik.o.rebuild;f.in...\n3956    f.percent.debug;f.percent;f.lik.o.rebuild;f.in...\n3958                                         division;R;r\n3960    res.mean;betaFE;y.mean;master0;master;u.hat;se...\n3962                                         division;R;r\n3964                                         division;R;r\n3965                                         division;R;r\n3966                                      sample;c;data;t\n3967                                               asylum\n3968                                  division;r;asylum;R\n3969                     division;r;asylum;R;election2017\n3970                     division;r;asylum;R;election2017\n3971    division;fitconst;fitland;fitlandnominor;r;fit...\n3972    division;fitconst;fitland;fitlandnominor;r;fit...\n3974                                         division;R;r\n3976                                     dat;division;R;r\n3977    dat_life;dat_gdp;division;m;dat;R;r;columnslif...\n3978    dat_life;dat_gdp;division;m;dat;R;r;columnslif...\n3979    dat_life;dat_gdp;division;m;dat;R;r;columnslif...\n3980    dat_life;dat_gdp;division;m;dat;R;r;columnslif...\n3988                                         division;R;r\n3990                                         division;R;r\n3992                                         division;R;r\n3993                                                  dat\n3996                                         division;R;r\n3997                                         division;R;r\n3998                                         division;R;r\n3999                                         division;R;r\n4000                                         division;R;r\n4001                                         division;R;r\n4002                                         division;R;r\n4003                                         division;R;r\n4004                                         division;R;r\n4005                                         division;R;r\n4006                                         division;R;r\n4007                                         division;R;r\n4008                                         division;R;r\n4009                                         division;R;r\n4010                                         division;R;r\n4011                                         division;R;r\n4012    randomSocialNetwork;addSCNet;updateNetwork;act...\n4013    division;randomSocialNetwork;addSCNet;r;update...\n4014    division;randomSocialNetwork;addSCNet;r;update...\n4015    division;randomSocialNetwork;addSCNet;r;update...\n4017                                    division;R;r;data\n4018                                    division;R;r;data\n4019                                              workdir\n4020       division;workdir;startYear;endYear;r;R;numboot\n4021       division;workdir;startYear;endYear;r;R;numboot\n4022                                 division;R;r;workdir\n4023       division;workdir;startYear;endYear;r;R;numboot\n4024       division;workdir;startYear;endYear;r;R;numboot\n4025                                         division;R;r\n4030                                         division;R;r\n4031                                         division;R;r\n4032                                         division;R;r\n4033                                         division;R;r\n4034                                         division;R;r\n4035                                         division;R;r\n4036                                         division;R;r\n4037                                         division;R;r\n4038                                         division;R;r\n4039                                         division;R;r\n4040                                         division;R;r\n4041                                         division;R;r\n4042                                         division;R;r\n4043                                         division;R;r\n4044                                         division;R;r\n4045                                         division;R;r\n4046                                         division;R;r\n4048                                         division;R;r\n4049                                         division;R;r\n4050                                         division;R;r\n4051                                         division;R;r\n4053    mat2list;get.coef;plot.zelig.list;s.coef;sd.co...\n4054    mat2list;division;plot.zelig.list;s.coef;get.c...\n4055    mat2list;division;plot.zelig.list;s.coef;get.c...\n4056         txo;bsx;tde;bmp;abvo;abbr;RankOrderPlot;bmnr\n4061                                               CARIRT\n4062                    division;R;r;CPGregression;CARIRT\n4063             division;CPGregression;R;LDAIRT;CARIRT;r\n4064        division;CPGregression;LIRT;r;LDAIRT;CARIRT;R\n4065    division;LDAIRTcpp;CPGregression;LIRT;r;LIRTcp...\n4066    division;LDAIRTcpp;CPGregression;LIRT;r;LIRTcp...\n4067    RunMCMC;division;LDAIRTcpp;CPGregression;MCMCB...\n4068    RunMCMC;division;LDAIRTcpp;CPGregression;MCMCB...\n4069    RunMCMC;division;LDAIRTcpp;CPGregression;MCMCB...\n4070    RunMCMC;division;LDAIRTcpp;summary.LDAIRT;CPGr...\n4071    RunMCMC;division;LDAIRTcpp;summary.LDAIRT;CPGr...\n4072    MCMCBurn;RunMCMC;LDAIRT;emptyplot;shadeBands;s...\n4073    MCMCBurn;RunMCMC;LDAIRT;emptyplot;shadeBands;s...\n4074    Plot_Albedo;Read_HCHO_L2;Plot_Flight_tracks;Ne...\n4075    Plot_Albedo;Read_HCHO_L2;Plot_Flight_tracks;Ne...\n4076    Plot_Albedo;Read_HCHO_L2;Plot_Flight_tracks;Ne...\n4079    a;cy2;b;papsam;pap;i;dtsamcr;dtsub;pkyr;pubdt;...\n4084                                                 path\n4085                                    division;R;r;path\n4086                                    division;R;r;path\n4087                                    division;R;r;path\n4088                                    division;R;r;path\n4089                                    division;R;r;path\n4092    division;car.mean;ideol.lower;ideol.upper;lin....\n4093    car.mean;ideol.lower;ideol.upper;lin.mean;lin....\n4095                                         division;R;r\n4096                        robust;debug.print;pcse2;pcse\n4098                                         division;R;r\n4099                          division;glmdm;r;glmdmEST;R\n4102                                         division;R;r\n4108                                         division;R;r\n4109                    division;R;r;crossvalidate.number\n4110    division;r;full.data.models;R;crossvalidate.nu...\n4111                    division;R;r;crossvalidate.number\n4112                    division;R;r;crossvalidate.number\n4113                    division;R;r;crossvalidate.number\n4114                    division;R;r;crossvalidate.number\n4115                    division;R;r;crossvalidate.number\n4116    division;r;full.data.models;R;crossvalidate.nu...\n4117    division;r;full.data.models;R;crossvalidate.nu...\n4118    division;full.data.models;r;larger.se;R;crossv...\n4119    division;full.data.models;r;larger.se;R;crossv...\n4120    division;full.data.models;r;larger.se;R;loess....\n4122                                         division;R;r\nName: vars, dtype: object\n"
    }
   ],
   "source": [
    "pd.set_option('display.max_rows', df.shape[0]+1)\n",
    "print(df.vars.dropna())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2.7.16 64-bit ('env': virtualenv)",
   "language": "python",
   "name": "python271664bitenvvirtualenv8836d05012704695b9eba33d763507b5"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3-final"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}